CO2 Sensor Module

Author: Maddie Robinson

This is an example of plotting CO2 data from two different csv files and the correlation between them.



In [1]:

    
# Plotting related python libraries
import matplotlib.pyplot as plt
from matplotlib import style

# Standard csv python libraries
import csv

# Python libraries for manipulating dates and times as objects
import dateutil
import time
import datetime

# Main python library for mathematical calculations
import numpy as np

# Python library for calculating p value in correlation
from scipy.stats.stats import pearsonr



In [2]:

    
def get_covariance(yfile1, yfile2):
# Calculate the correlation coefficient between the CO2 values from two separate files 

	yfile1_sigma = np.sqrt(np.var(yfile1))
	yfile2_sigma = np.sqrt(np.var(yfile2))
	yfile1_mean = np.mean(yfile1)
	yfile2_mean = np.mean(yfile2)

	files1and2_sum = 0
	for i in range(len(yfile1)):
		files1and2_sum = files1and2_sum + (yfile1[i]-yfile1_mean)*(yfile2[i]-yfile2_mean)
	correlation_coef = files1and2_sum / ((yfile1_sigma)*(yfile2_sigma)) / len(yfile1)
	return correlation_coef



In [ ]:

    
# User imputs name of first file
user_file = input("File Name 1: ")

# Store time and CO2 raw data from first file
results = csv.reader(open(user_file), delimiter=',')
times = []
CO2 = []
row_counter = 0
for r in results:
	row_counter += 1
	if row_counter>1:
		times.append(dateutil.parser.parse(r[0]))
		CO2.append(int(r[1]))

# Merge n CO2 data points
n_merge = int(input("n data points to combine:"))
ndata = len(CO2)
nsum_data = int(ndata/n_merge)

data_ave = []
data_unc = []
merge_times = []

# Calculate mean and standard deviation of first file data
# Merge times to match merged data points
for i in range(nsum_data):
	idata = CO2[i*n_merge:(i+1)*n_merge]
	idata_array = np.asarray(idata)
	CO2mean = np.mean(idata_array)
	CO2sigma = np.sqrt(np.var(idata_array))
	data_ave.append(CO2mean)
	data_unc.append(CO2sigma)
	itimes = times[i*n_merge:(i+1)*n_merge]
	itime = itimes[int(len(itimes)/2)]
	merge_times.append(itime)



In [ ]:

    
# User imputs name of second file
user_file_2 = input("File Name 2: ")

# Store time and CO2 raw data from second file
results_2 = csv.reader(open(user_file_2), delimiter=',')
times_2 = []
CO2_2 = []
row_counter = 0
for r in results_2:
	row_counter += 1
	if row_counter>1:
		times_2.append(dateutil.parser.parse(r[0]))
		CO2_2.append(int(r[1]))

# Merge n CO2 data points        
n_merge_2 = int(input("n data points to combine:"))
ndata_2 = len(CO2_2)
nsum_data_2 = int(ndata_2/n_merge_2)

data_ave_2 = []
data_unc_2 = []
merge_times_2 = []

# Calculate mean and standard deviation of second file data
# Merge times to match merged data points
for i in range(nsum_data_2):
	idata_2 = CO2_2[i*n_merge_2:(i+1)*n_merge_2]
	idata_array_2 = np.asarray(idata_2)
	CO2mean_2 = np.mean(idata_array_2)
	CO2sigma_2 = np.sqrt(np.var(idata_array_2))
	data_ave_2.append(CO2mean_2)
	data_unc_2.append(CO2sigma_2)
	itimes_2 = times_2[i*n_merge_2:(i+1)*n_merge_2]
	itime_2 = itimes_2[int(len(itimes_2)/2)]
	merge_times_2.append(itime_2)



In [ ]:

    
# Convert data averages into arrays
CO2file1_array = np.asarray(data_ave)
CO2file2_array = np.asarray(data_ave_2)
correlation_coef = get_covariance(CO2file1_array, CO2file2_array)

# Print correlation coefficient to terminal
print("Correlation: {}".format(correlation_coef))

# Print length of data averages to terminal
print("Length of touch data: {}, Length of open data: {}".format(len(data_ave),len(data_ave_2)))

# Calculate and print p value to terminal
correlation_values = pearsonr(CO2file1_array, CO2file2_array)
print("p value =", correlation_values[1])



In [ ]:

    
# Plot time vs CO2 data
fig = plt.figure()
plt.plot(merge_times, data_ave, "b.", label="File 1")
plt.plot(merge_times_2, data_ave_2, "g.", label="File 2")
plt.errorbar(merge_times, data_ave, yerr = data_unc)
plt.errorbar(merge_times_2, data_ave_2, yerr = data_unc_2)
plt.xlabel("Time")
plt.ylabel("CO2 (ppm)")
plt.legend()
plt.grid(True,color='k')
fig.autofmt_xdate()

# Plot CO2 correlation between files
fig = plt.figure()
plt.plot(CO2file1_array, CO2file2_array, "b.")
plt.xlabel("CO2 file 1")
plt.ylabel("CO2 file 2")

# Print correlation coefficient to CO2 correlation figure
# Print p vlaue to CO2 correlation figure
corr_statement = "Correlation Coefficient =", correlation_coef
p_value = "P Value =", correlation_values[1]
plt.annotate(corr_statement, xy=(0,1), xytext=(12,-12), va='top',
	xycoords='axes fraction', textcoords='offset points')
plt.annotate(p_value, xy=(0,0.94), xytext=(12,-12), va='top',
	xycoords='axes fraction', textcoords='offset points')
plt.show()